In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
from datetime import datetime, timedelta
import pandas as pd
import matplotlib.pyplot as plt
import operator
import seaborn as sns
%matplotlib inline
In [2]:
n_year_storms = pd.read_csv('data/n_year_storms_ohare_noaa.csv')
n_year_storms['start_time'] = pd.to_datetime(n_year_storms['start_time'])
n_year_storms['end_time'] = pd.to_datetime(n_year_storms['end_time'])
n_year_storms.head()
Out[2]:
In [3]:
year_event_100 = n_year_storms[n_year_storms['n'] == 100]
year_event_100
Out[3]:
In [4]:
rain_df = pd.read_csv('data/ohare_hourly_20160929.csv')
rain_df['datetime'] = pd.to_datetime(rain_df['datetime'])
rain_df = rain_df.set_index(pd.DatetimeIndex(rain_df['datetime']))
rain_df = rain_df['19700101':]
chi_rain_series = rain_df['HOURLYPrecip'].resample('1H', label='right').max().fillna(0)
chi_rain_series.head()
Out[4]:
In [5]:
# N-Year Storm variables
# These define the thresholds laid out by bulletin 70, and transfer mins and days to hours
n_year_threshes = pd.read_csv('../../n-year/notebooks/data/n_year_definitions.csv')
n_year_threshes = n_year_threshes.set_index('Duration')
dur_str_to_hours = {
'5-min':5/60.0,
'10-min':10/60.0,
'15-min':15/60.0,
'30-min':0.5,
'1-hr':1.0,
'2-hr':2.0,
'3-hr':3.0,
'6-hr':6.0,
'12-hr':12.0,
'18-hr':18.0,
'24-hr':24.0,
'48-hr':48.0,
'72-hr':72.0,
'5-day':5*24.0,
'10-day':10*24.0
}
n_s = [int(x.replace('-year','')) for x in reversed(list(n_year_threshes.columns.values))]
duration_strs = sorted(dur_str_to_hours.items(), key=operator.itemgetter(1), reverse=False)
n_year_threshes
Out[5]:
In [6]:
# Find n-year storms and store them in a data frame.
def find_n_year_storms(start_time_str, end_time_str, n):
start_time = pd.to_datetime(start_time_str)
end_time = pd.to_datetime(end_time_str)
n_index = n_s.index(n)
next_n = n_s[n_index-1] if n_index != 0 else None
storms = []
for duration_tuple in duration_strs:
duration_str = duration_tuple[0]
low_thresh = n_year_threshes.loc[duration_str, str(n) + '-year']
high_thresh = n_year_threshes.loc[duration_str, str(next_n) + '-year'] if next_n is not None else None
duration = int(dur_str_to_hours[duration_str])
sub_series = chi_rain_series[start_time: end_time]
rolling = sub_series.rolling(window=int(duration), min_periods=0).sum()
if high_thresh is not None:
event_endtimes = rolling[(rolling >= low_thresh) & (rolling < high_thresh)].sort_values(ascending=False)
else:
event_endtimes = rolling[(rolling >= low_thresh)].sort_values(ascending=False)
for index, event_endtime in event_endtimes.iteritems():
this_start_time = index - timedelta(hours=duration)
if this_start_time < start_time:
continue
storms.append({'n': n, 'end_time': index, 'inches': event_endtime, 'duration_hrs': duration,
'start_time': this_start_time})
return pd.DataFrame(storms)
In [7]:
storm1 = chi_rain_series['1987-08-11 23:00:00':'1987-08-21 23:00:00']
storm1.cumsum().plot(title="Cumulative rainfall over 1987 100-year storm")
Out[7]:
In [8]:
# The rainfall starts at...
storm1[storm1 > 0].index[0]
Out[8]:
In [9]:
storm1 = storm1['1987-08-13 22:00:00':]
storm1.head()
Out[9]:
In [10]:
# There are two periods of drastic rise in rain. Print out the percent of the storm that has fallen hourly to see that the
# first burst ends at 8/14 10AM
storm1.cumsum()/storm1.sum()
Out[10]:
In [11]:
# Looking for an n-year storm in the small period of drastic increase #1
find_n_year_storms('1987-08-13 22:00:00', '1987-08-14 10:00:00', 100)
Out[11]:
In [12]:
# Let's look for the second jump in precip
storm1['1987-08-16 12:00:00':].cumsum()/storm1.sum()
Out[12]:
In [71]:
# Looking for an n-year storm in the small period of drastic increase #2
find_n_year_storms('1987-08-16 20:00:00', '1987-08-17 00:00:00', 10)
Out[71]:
In [14]:
storm2 = chi_rain_series['2008-09-04 13:00:00':'2008-09-14 13:00:00']
storm2.cumsum().plot(title="Cumulative rainfall over 2008 100-year storm")
Out[14]:
In [16]:
total_rainfall = storm2.sum()
total_rainfall
Out[16]:
In [17]:
storm2.cumsum()/total_rainfall
Out[17]:
In [26]:
# First downpour is a 1-year storm
find_n_year_storms('2008-09-04 13:00:00', '2008-09-04 21:00:00', 1)
Out[26]:
In [29]:
storm2['2008-09-08 00:00:00':'2008-09-09 00:00:00'].cumsum()/total_rainfall
Out[29]:
In [37]:
find_n_year_storms('2008-09-08 10:00:00', '2008-09-08 20:00:00', 1)
Out[37]:
In [38]:
chi_rain_series['2008-09-08 10:00:00':'2008-09-08 20:00:00'].sum()
Out[38]:
In [39]:
# No n-year events for second downpour
In [41]:
# Downpour 3
storm2['2008-09-12 12:00:00':'2008-09-13 15:00:00'].cumsum()/total_rainfall
Out[41]:
In [45]:
find_n_year_storms('2008-09-12 12:00:00','2008-09-13 15:00:00',50)
Out[45]:
In [48]:
storm3 = chi_rain_series['2011-07-22 08:00:00':'2011-07-23 08:00:00']
storm3.cumsum().plot(title="Cumulative rainfall over 2011 100-year storm")
Out[48]:
In [49]:
storm3['2011-07-22 22:00:00':'2011-07-23 05:00:00'].cumsum()/storm3.sum()
Out[49]:
In [50]:
find_n_year_storms('2011-07-22 22:00:00', '2011-07-23 05:00:00', 100)
Out[50]:
In [ ]:
In [17]:
chi_rain_series['2011-07-22 08:00:00':'2011-07-23 08:00:00'].cumsum().plot(title="Cumulative rainfall over 2011 100-year storm")
Out[17]:
In [18]:
chi_rain_series['2010-07-23 16:00:00':'2010-07-24 16:00:00'].cumsum().plot(title="Cumulative rainfall over 2010 50-year storm")
Out[18]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [2]:
# The following code is copied verbatim from @pjsier Rolling Rain N-Year Threshold.pynb
# Loading in hourly rain data from CSV, parsing the timestamp, and adding it as an index so it's more useful
rain_df = pd.read_csv('data/ohare_hourly_observations.csv')
rain_df['datetime'] = pd.to_datetime(rain_df['datetime'])
rain_df = rain_df.set_index(pd.DatetimeIndex(rain_df['datetime']))
print(rain_df.dtypes)
rain_df.head()
Out[2]:
In [3]:
chi_rain_series = rain_df['hourly_precip'].resample('1H').max()
In [4]:
# This is where I break with @pjsier
In [5]:
# I am assuming here that a single hour cannot be part of more than one storm in the event_endtimes list.
# Therefore, I am looping through the list and throwing out any storms that include hours from heavier storms in the
# same block of time.=
def get_storms_without_overlap(event_endtimes, hours):
times_taken = []
ret_val = []
for i in range(len(event_endtimes)):
timestamp = event_endtimes.iloc[i].name
times_here = []
for h in range(hours):
times_here.append(timestamp - pd.DateOffset(hours=h))
if not bool(set(times_here) & set(times_taken)):
times_taken.extend(times_here)
ret_val.append({'start': timestamp - pd.DateOffset(hours=hours), 'end': timestamp, 'inches': event_endtimes.iloc[i]['hourly_precip']})
return ret_val
In [6]:
# Find the 100 year event. First, define the storm as based in Illinois Bulletin 70 as the number of inches
# of precipition that falls over a given span of straight hours.
_100_year_storm_milestones = [{'hours': 240, 'inches': 11.14}, {'hours':120, 'inches': 9.96},
{'hours': 72, 'inches': 8.78}, {'hours': 48, 'inches': 8.16}, {'hours': 24, 'inches': 7.58},
{'hours': 18, 'inches': 6.97}, {'hours': 12, 'inches': 6.59}, {'hours': 6, 'inches': 5.68},
{'hours': 3, 'inches': 4.9}, {'hours': 2, 'inches': 4.47}, {'hours': 1, 'inches': 3.51}]
all_storms = []
print("\tSTART\t\t\tEND\t\t\tINCHES")
for storm_hours in _100_year_storm_milestones:
rolling = pd.DataFrame(chi_rain_series.rolling(window=storm_hours['hours']).sum())
event_endtimes = rolling[(rolling['hourly_precip'] >= storm_hours['inches'])]
event_endtimes = event_endtimes.sort_values(by='hourly_precip', ascending=False)
storms = get_storms_without_overlap(event_endtimes, storm_hours['hours'])
if len(storms) > 0:
print("Across %s hours" % storm_hours['hours'])
for storm in storms:
print('\t%s\t%s\t%s inches' % (storm['start'], storm['end'], storm['inches']))
all_storms.extend(storms)
In [7]:
# Analysis Questions
# 1/25/2015 - 2/4/2015 - Worst storm by far in quantity, but Jan-Feb -- is it snow?
# 9/4/2008 - 9/14/2008 - This only appeared on the 10-day event, so it must've been well distributed across the days?
# 7/21/2011 - 7/23/2011 - Very heavy summer storm!
In [8]:
# Examining the storm from 7/21-2011 - 7/23/2011
import datetime
july_2011_storm = chi_rain_series.loc[(chi_rain_series.index >= datetime.datetime(2011,7,20)) & (chi_rain_series.index <= datetime.datetime(2011,7,24))]
july_2011_storm.head()
Out[8]:
In [9]:
july_2011_storm.plot()
Out[9]:
In [10]:
# Let's take a look at the cumulative buildup of the storm over time
cumulative_rainj11 = pd.DataFrame(july_2011_storm).hourly_precip.cumsum()
cumulative_rainj11.head()
Out[10]:
In [11]:
cumulative_rainj11.plot()
Out[11]:
In [12]:
cumulative_rainj11.loc[(cumulative_rainj11.index >= datetime.datetime(2011,7,22,21,0,0)) & (cumulative_rainj11.index <= datetime.datetime(2011,7,23,5,0,0))]
Out[12]:
In [13]:
# We got a crazy, crazy downpour from about 11:00PM until 2:00AM. That alone was a 100-year storm, where we got 6.79 inches
# in 3 hours. That would've been a 100-year storm if we'd have gotten that in 12 hours!